Roy 2025 Figures and tables
April 22, 2025
Source:vignettes/Roy2025_Figures_and_Tables.Rmd
Roy2025_Figures_and_Tables.RmdIntroduction
- Once you’ve downloaded all the requisite source information her the package’s instructions, you can build the package and re-create Figures and Tables published in Roy2025
- This vignette is provided to enable anyone to see the source data and methodology behind our publication.
Figure 1
1A - Continental US A11
gg_state
out_data |>
dplyr::mutate(gf = round(gf,4)) |>
dplyr::rename(
`State` = region,
`Census Region` = census_region,
`FIPS` = fips,
`Population-Adjusted Genotypic Frequency` = gf
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)1B - A11:01 by County
out_data |>
dplyr::ungroup() |>
dplyr::mutate(gf = round(gf,4)) |>
dplyr::rename(
`Census Region` = census_region,
`County` = county,
`FIPS` = fips,
`Allele` = allele,
`Population-Adjusted Genotypic Frequency` = gf
) |>
dplyr::select(
`Census Region`,
`County`,
`FIPS`,
`Allele`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)1C - NDMP Correlations
NMDP Values are not publicly available for privacy reasons. Please contact co-Author Martin Maiers for inquiries.
1D - A11:01 by CA County
gg_a11_in_ca
out_data |>
dplyr::ungroup() |>
dplyr::mutate(gf = round(gf,4)) |>
dplyr::rename(
`Census Region` = census_region,
`County` = county,
`FIPS` = fips,
`Allele` = allele,
`Population-Adjusted Genotypic Frequency` = gf
) |>
dplyr::select(
`Census Region`,
`County`,
`FIPS`,
`Allele`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)1E - A11:01 by CA by H4 Hexagon
ca_4$p1
ca_4$genotypic_frequencies_by_hexon |>
dplyr::ungroup() |>
dplyr::mutate(us_2020_nmdp_gf_sum = round(us_2020_nmdp_gf_sum,4)) |>
dplyr::rename(
`Hexagon ID` = hex,
`Allele` = allele,
`Hexagon Population` = total_2020_pop,
`Population-Adjusted Genotypic Frequency` = us_2020_nmdp_gf_sum) |>
dplyr::select(
`Hexagon ID`,
`Allele`,
`Hexagon Population`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)Figure 2
2B - B58:01 in MS by County
gg_b58_in_ms
out_data |>
dplyr::ungroup() |>
dplyr::mutate(gf = round(gf,4)) |>
dplyr::rename(
`Census Region` = census_region,
`County` = county,
`FIPS` = fips,
`Allele` = allele,
`Population-Adjusted Genotypic Frequency` = gf
) |>
dplyr::select(
`Census Region`,
`County`,
`FIPS`,
`Allele`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)2C - B58:01 in MS by Hexagon
ms_4$p1
ms_4$genotypic_frequencies_by_hexon |>
dplyr::ungroup() |>
dplyr::mutate(us_2020_nmdp_gf_sum = round(us_2020_nmdp_gf_sum,4)) |>
dplyr::rename(
`Hexagon ID` = hex,
`Allele` = allele,
`Hexagon Population` = total_2020_pop,
`Population-Adjusted Genotypic Frequency` = us_2020_nmdp_gf_sum) |>
dplyr::select(
`Hexagon ID`,
`Allele`,
`Hexagon Population`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)Tables
Table 1: United States 2020 Census Adjusted HLA-A*11:01 Genotypic Frequencies
CensusHLA::us_pop_multirace_in_nmdp_codes |>
dplyr::left_join(
CensusHLA::nmdp_hla_frequencies_by_race_us_2020_census_adjusted |>
dplyr::filter(allele == 'A*11:01') |>
dplyr::select(allele, allele, nmdp_race_code,us_2020_percent_pop,nmdp_calc_gf,us_2020_nmdp_gf) |>
dplyr::arrange(desc(us_2020_percent_pop))
) |>
# Convert percentages and gfs to percentages
dplyr::mutate(
us_2020_percent_pop = us_2020_percent_pop * 100,
nmdp_calc_gf = nmdp_calc_gf * 100,
us_2020_nmdp_gf = us_2020_nmdp_gf * 100
) |>
# Round percentages and gf to 1 digit after decimal
dplyr::mutate(
us_2020_percent_pop = round(us_2020_percent_pop, 1),
nmdp_calc_gf = round(nmdp_calc_gf, 1),
us_2020_nmdp_gf = round(us_2020_nmdp_gf, 1)
) |>
dplyr::select(
`Ethnic Code` = nmdp_race_code,
`Allele` = allele,
`Single Race Population` = total_single_race_pop,
`Multi-Race Population ` = total_multiple_race_pop,
`Total Population` = total_2020_pop,
`Percentage of Total Pop` = us_2020_percent_pop,
`NMDP Calcualted GF` = nmdp_calc_gf,
`Population-Adjusted GF` = us_2020_nmdp_gf
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)## Joining with `by = join_by(nmdp_race_code)`
Table 2: HLA-A*11:01 Population-adjusted genotypic frequencies for top 11 NCI Catchment areas.
CensusHLA::a11_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed |> dplyr::select(-geometry) |> dplyr::mutate(patient_pop = total_2020_pop * us_2020_nmdp_gf_sum) |> dplyr::arrange(desc(patient_pop)) |> DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
#pageLength = 11,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Supplemental Tables
Supplemental Table 1 - California County population-adjusted HLA-A*11:01 Genotypic frequencies
out_data |>
dplyr::ungroup() |>
dplyr::mutate(gf = round(gf,4)) |>
dplyr::rename(
`Census Region` = census_region,
`County` = county,
`FIPS` = fips,
`Allele` = allele,
`Population-Adjusted Genotypic Frequency` = gf
) |>
dplyr::select(
`Census Region`,
`County`,
`FIPS`,
`Allele`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)Supplemental Table 2 - United States 2020 Census Adjusted HLA-B*58:01 Genotypic Frequencies for Mississippi
CensusHLA::us_pop_multirace_in_nmdp_codes |>
dplyr::left_join(
CensusHLA::census_adjusted_nmdp_hla_frequencies_by_state |> dplyr::filter(allele == 'B*58:01') |>
dplyr::filter(census_region == 'Mississippi') |>
dplyr::select(allele,census_region,nmdp_race_code,us_2020_percent_pop,nmdp_calc_gf,us_2020_nmdp_gf) |>
dplyr::arrange(desc(us_2020_percent_pop))
) |>
# Convert percentages and gfs to percentages
dplyr::mutate(
us_2020_percent_pop = us_2020_percent_pop * 100,
nmdp_calc_gf = nmdp_calc_gf * 100,
us_2020_nmdp_gf = us_2020_nmdp_gf * 100
) |>
# Round percentages and gf to 1 digit after decimal
dplyr::mutate(
us_2020_percent_pop = round(us_2020_percent_pop, 1),
nmdp_calc_gf = round(nmdp_calc_gf, 1),
us_2020_nmdp_gf = round(us_2020_nmdp_gf, 1)
) |>
dplyr::select(
`Region` = census_region,
`Ethnic Code` = nmdp_race_code,
`Allele` = allele,
`Single Race Population` = total_single_race_pop,
`Multi-Race Population ` = total_multiple_race_pop,
`Total Population` = total_2020_pop,
`Percentage of Total Pop` = us_2020_percent_pop,
`NMDP Calcualted GF` = nmdp_calc_gf,
`Population-Adjusted GF` = us_2020_nmdp_gf
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)## Joining with `by = join_by(nmdp_race_code)`
Supplemental Table 3 - Mississippi County population-adjusted HLA-B*58:01 Genotypic frequencies
out_data |>
dplyr::ungroup() |>
dplyr::mutate(gf = round(gf,4)) |>
dplyr::rename(
`Census Region` = census_region,
`County` = county,
`FIPS` = fips,
`Allele` = allele,
`Population-Adjusted Genotypic Frequency` = gf
) |>
dplyr::select(
`Census Region`,
`County`,
`FIPS`,
`Allele`,
`Population-Adjusted Genotypic Frequency`
) |>
DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 10,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)Supplemental Table 4 - HLA-B*58:01 Population-adjusted genotypic frequencies by NCI Catchment areas. {#st4}
CensusHLA::b58_catchment_summed$sf_tract_centroids_for_all_states_with_catchment_with_us_population_race_code_percentages_by_tract_summed |> dplyr::select(-geometry) |> dplyr::mutate(patient_pop = total_2020_pop * us_2020_nmdp_gf_sum) |> dplyr::arrange(desc(patient_pop)) |> DT::datatable(
,filter = 'top'
,rownames = FALSE
,extensions = 'Buttons', options = list(
scrollX=TRUE,
pageLength = 11,
dom = 'Bfrtip',
buttons = c('csv', 'excel')
)
)## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html


